library(tidyverse)
library(gifski)
library(ggraph)
library(here)
library(igraph)
source(here("modelFunction_rewiring.R"))

First, run the model.

# Define parameters
N = 50
edge.prob <- 0.04
burn.in = 20
burn.out = 5
pm = 0.3
ps = 0.1
pa = 0.2
add00 = c(0.5, 10)
lose01 = 0.1
add10 = 0.05
lose11 = c(0.5, 0.5)
histMultiplier = 1.2
doRemoval = TRUE
modelGraphs <- runModel(N = N, # Nodes in the network
                     edge.prob = edge.prob, 
                     burn.in = burn.in,
                     burn.out = burn.out,
                     pm = pm,
                     ps = ps, 
                     pa = pa,
                     add00 = add00, 
                     lose01 = lose01, 
                     add10 = add10,
                     lose11 = lose11, 
                     histMultiplier = histMultiplier,
                     doRemoval = doRemoval) %>%
  lapply(., function(x){
    igraph::graph_from_adjacency_matrix(x, mode = "undirected", add.colnames = "label")
  })
  1. How does variation in degree and betweenness emerge in this model?

Compute degree and betweenness for each of the model networks.

attrDF <- lapply(modelGraphs, function(x){
  x %>%
    set_vertex_attr(., name = "degree", value = degree(x)) %>%
    set_vertex_attr(., name = "betweenness", value = betweenness(x)) %>%
    tidygraph::as_tbl_graph() %>%
    tidygraph::activate(nodes) %>%
    data.frame()
  }) %>%
  data.table::rbindlist(idcol = "slice") %>%
  as.data.frame() %>%
  mutate(beforeAfter = ifelse(slice <= burn.in, "before", "after"))

# make a long-format version to use for facetted plots
attrDFLong <- attrDF %>%
    pivot_longer(cols = c(degree, betweenness), names_to = "measure", values_to = "value")

View degree and betweenness over time

# Degree and betweenness over time
attrDFLong %>%
  ggplot(aes(x = slice, y = value, col = label, group = label))+
  facet_wrap(~measure, scales = "free")+
  geom_line(alpha = 0.7, size = 0.5)+
  theme_minimal()+
  geom_vline(xintercept = burn.in+1, col = "red", size = 1)+
  theme(legend.position = "none")

# Let's just look at a few labels, because this plot is too hard to read
indivs <- sample(unique(attrDFLong$label), 5)
attrDFLong %>%
  filter(label %in% indivs) %>%
  ggplot(aes(x = slice, y = value, col = label, group = label))+
  facet_wrap(~measure, scales = "free")+
  geom_line()+
  theme_minimal()+
  geom_vline(xintercept = burn.in+1, col = "red", size = 1)

# This is still a little hard to see... let's look at just the "before" rows.
attrDFLong %>%
  filter(label %in% indivs, beforeAfter == "before") %>%
  ggplot(aes(x = slice, y = value, col = label, group = label))+
  facet_wrap(~measure, scales = "free")+
  geom_line()+
  theme_minimal()

# Okay, I'm seeing some differentiation, but no clear patterns. Because of how the model is set up, the rank order tends to be maintained for a few consecutive time steps, but not over the whole span of the baseline model. This makes sense, since labels don't have any a priori reason to have higher or lower degrees than other labels.

Now, going to compute three network-level measures before and after the removal of an label.

1. Network density deltas

How does edge density behave over time?

densities <- lapply(modelGraphs, function(x){
  edge_density(x)
 }) %>% 
  unlist() %>% 
  as.data.frame() %>%
  setNames(., "density") %>%
  mutate(slice = 1:length(modelGraphs))

densities %>%
  ggplot(aes(x = slice, y = density))+
  geom_line()+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 1, col = "red")

First delta: immediately before removal to after removal/pre-rewiring

delta1_density <- igraph::edge_density(modelGraphs[[burn.in+1]])-igraph::edge_density(modelGraphs[[burn.in]])

Second delta: after removal/pre-rewiring to post-rewiring

delta2_density <- igraph::edge_density(modelGraphs[[burn.in+2]])-igraph::edge_density(modelGraphs[[burn.in+1]])

2. Network connectivity (average path length) deltas

How does connectivity behave over time?

connectivities <- lapply(modelGraphs, function(x){
  mean_distance(x)
 }) %>% 
  unlist() %>% 
  as.data.frame() %>%
  setNames(., "connectivity") %>%
  mutate(slice = 1:length(modelGraphs))

connectivities %>%
  ggplot(aes(x = slice, y = connectivity))+
  geom_line()+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 1, col = "red")
## Warning: Removed 1 row(s) containing missing values (geom_path).

First delta: immediately before removal to after removal/pre-rewiring

delta1_meanDistance <- igraph::mean_distance(modelGraphs[[burn.in+1]])-igraph::mean_distance(modelGraphs[[burn.in]])

Second delta: after removal/pre-rewiring to post-rewiring

delta2_meanDistance <- igraph::mean_distance(modelGraphs[[burn.in+2]])-igraph::mean_distance(modelGraphs[[burn.in+1]])

3. Network modularity deltas

How does modularity behave over time?

clustered <- lapply(modelGraphs, function(x){
  x %>%
    cluster_fast_greedy()
 }) 
  
modularities <- data.frame(slice = 1:length(clustered),
                           modularity = unlist(lapply(clustered, modularity)),
                           nClusters = unlist(lapply(clustered, length)),
                           nIndivs = unlist(lapply(clustered, function(x){length(membership(x))})))

modularities %>%
  ggplot(aes(x = slice, y = modularity))+
  geom_line()+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 1, col = "red")

modularities %>%
  ggplot(aes(x = slice, y = nClusters))+
  geom_line()+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 1, col = "red")

modularities %>%
  ggplot(aes(x = slice, y = nClusters/modularity))+
  geom_line()+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 1, col = "red")

# there doesn't seem to be a pattern in the ratio of nClusters to modularity, but I will investigate on a larger scale.

Visually examine the clustering results to see if I see anything notable

communities <- clustered[burn.in:(burn.in+2)]
graphs <- modelGraphs[burn.in:(burn.in+2)]

map2(.x = communities, .y = graphs, .f = function(.x, .y){
  plot(.x, .y, vertex.size = 5)
})

## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL

How does the clustering algorithm deal with isolated nodes?

Well, we know that there are clusters of size 1. So it looks like isolated nodes are being included.

What’s the relationship between the number of isolated nodes vs. the number of clusters of size 1? In other words, how often, if ever, do we end up with a cluster of size 1 that is NOT an isolated node? I suspect that this graph is well-connected enough that this will never happen. I wonder under what conditions, if any, it would…

nIsolatedNodes <- lapply(modelGraphs, function(x){
  sum(degree(x) == 0)
}) %>% unlist()

nSingletonClusters <- lapply(clustered, function(x){
  sum(sizes(x) == 1)
}) %>% unlist()

plot(nSingletonClusters ~ nIsolatedNodes) # okay yeah, there's a perfect relationship between these. This tells us that there are no cases in which we get a cluster of size 1 that is NOT an isolated node.

Let’s examine what happens if we do the modularity calculations on networks with any isolated nodes removed!

noIso <- lapply(modelGraphs, function(x){
  g <- delete_vertices(x, which(degree(x) == 0))
})

How does modularity behave over time when we remove isolated nodes?

clustered_noIso <- lapply(noIso, function(x){
  cluster_fast_greedy(x)
 })
  
mods_noIso <- data.frame(slice = 1:length(clustered_noIso),
                           modularity = unlist(lapply(clustered_noIso, modularity)),
                           nClusters = unlist(lapply(clustered_noIso, length)),
                           nIndivs = unlist(lapply(clustered_noIso, function(x){length(membership(x))})))

mods_noIso %>%
  ggplot(aes(x = slice, y = modularity))+
  geom_line()+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 1, col = "red")+
  ggtitle("modularity over time, without isolated nodes")

This may look exactly the same as the above graph. That’s because when N is large, there are hardly any isolated nodes.

First delta: immediately before removal to after removal/pre-rewiring

delta1_modularity <- modularity(cluster_fast_greedy(modelGraphs[[burn.in+1]]))-modularity(cluster_fast_greedy(modelGraphs[[burn.in]]))
delta1_modularity_noIso <- modularity(cluster_fast_greedy(noIso[[burn.in+1]]))-modularity(cluster_fast_greedy(noIso[[burn.in]]))

Second delta: after removal/pre-rewiring to post-rewiring

delta2_modularity <- modularity(cluster_fast_greedy(modelGraphs[[burn.in+2]]))-modularity(cluster_fast_greedy(modelGraphs[[burn.in+1]]))
delta2_modularity_noIso <- modularity(cluster_fast_greedy(noIso[[burn.in+2]]))-modularity(cluster_fast_greedy(noIso[[burn.in+1]]))

What should our measure be?

I am not at all convinced that these momentary deltas are what I should be using. Seems like we need to at least consider two time steps out, since the model relies on this. Looking at the graphs above shows me how different the longer-term trends are from the short-term fluctuations.

Let’s run the model 100 times and see if trends emerge in how to measure.

nRuns <- 100
modelRuns <- vector(mode = "list", length = nRuns)
for(i in 1:nRuns){
  modelRuns[[i]] <- runModel(N = N, # Nodes in the network
                     edge.prob = edge.prob, 
                     burn.in = burn.in,
                     burn.out = burn.out,
                     pm = pm,
                     ps = ps, 
                     pa = pa,
                     add00 = add00, 
                     lose01 = lose01, 
                     add10 = add10,
                     lose11 = lose11, 
                     histMultiplier = histMultiplier,
                     doRemoval = doRemoval) %>%
  lapply(., function(x){
    igraph::graph_from_adjacency_matrix(x, mode = "undirected")
  })
}

Compute the three network-level measures for each of the model runs.

densList <- lapply(modelRuns, function(x){
  lapply(x, function(y){
    edge_density(y)
  }) %>%
    unlist() %>%
    as.data.frame() %>%
    setNames(., "density") %>%
    mutate(slice = 1:nrow(.))
})
densDF <- data.table::rbindlist(densList, idcol = "run")

connList <- lapply(modelRuns, function(x){
  lapply(x, function(y){
    mean_distance(y)
  }) %>%
    unlist() %>%
    as.data.frame() %>%
    setNames(., "connectivity") %>%
    mutate(slice = 1:nrow(.))
})
connDF <- data.table::rbindlist(connList, idcol = "run")

moduList <- lapply(modelRuns, function(x){
    clustered <- lapply(x, cluster_fast_greedy)
    df <- data.frame(slice = 1:length(clustered),
                     modularity = unlist(lapply(clustered, modularity)),
                     nClusters = unlist(lapply(clustered, length)),
                     nIndivs = unlist(lapply(clustered, function(i){length(membership(i))})))
    return(df)
})
moduDF <- data.table::rbindlist(moduList, idcol = "run")

Separate chunk for doing the calculations on the no-isolated-nodes graphs, so we don’t unnecessarily suppress warnings for the other parts of the code.

modelRuns_noIso <- lapply(modelRuns, function(x){
  lapply(x, function(y){
    delete_vertices(y, which(degree(y) == 0))
  })
})
moduList_noIso <- lapply(modelRuns, function(x){
    noIso <- lapply(x, function(a){
      delete_vertices(a, which(degree(a) == 0))
    })
    clustered <- lapply(noIso, cluster_fast_greedy)
    df <- data.frame(slice = 1:length(clustered),
                     modularity = unlist(lapply(clustered, modularity)),
                     nClusters = unlist(lapply(clustered, length)),
                     nIndivs = unlist(lapply(clustered, function(i){length(membership(i))})))
    return(df)
})
moduDF_noIso <- data.table::rbindlist(moduList_noIso, idcol = "run")

Make some plots and try to detect any changepoint trends

densDF %>%
  ggplot(aes(x = slice, y = density))+
  geom_line(aes(group = run, col = run), size = 0.1)+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 0.5, col = "red")+
  scale_color_viridis_c()

connDF %>%
  ggplot(aes(x = slice, y = connectivity))+
  geom_line(aes(group = run, col = run), size = 0.1)+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 0.5, col = "red")+
  scale_color_viridis_c()
## Warning: Removed 100 row(s) containing missing values (geom_path).

moduDF %>%
  ggplot(aes(x = slice, y = modularity))+
  geom_line(aes(group = run, col = run), size = 0.1)+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 0.5, col = "red")+
  scale_color_viridis_c() +
  ggtitle("includes isolated nodes")

moduDF_noIso %>%
  ggplot(aes(x = slice, y = modularity))+
  geom_line(aes(group = run, col = run), size = 0.1)+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 0.5, col = "red")+
  scale_color_viridis_c()+
  ggtitle("no isolated nodes")

# This should look basically the same as the previous graph, since there are very few isolated nodes.
moduDF %>%
  filter(slice > 3) %>%
  ggplot(aes(x = slice, y = nClusters))+
  geom_line(aes(group = run, col = run), size = 0.1)+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 0.5, col = "red")+
  scale_color_viridis_c()

moduDF %>%
  ggplot(aes(x = slice, y = modularity/nClusters))+
  geom_line(aes(group = run, col = run), size = 0.1)+
  theme_minimal()+
  geom_vline(aes(xintercept = burn.in+1), size = 0.5, col = "red")+
  scale_color_viridis_c()

# Absolutely no pattern at all in the ratio of clusters to modularity.